# devtools::install_github("ropensci/plotly")
library(plotly)
Loading required package: ggplot2
Registered S3 method overwritten by 'dplyr':
method from
print.rowwise_df
Need help? Try Stackoverflow: https://stackoverflow.com/tags/ggplot2.
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: 㤼㸱plotly㤼㸲
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
last_plot
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
The following object is masked from 㤼㸱package:graphics㤼㸲:
layout
library(DAAG)
Loading required package: lattice
library(ggplot2)
head(ais)
min_max <- function(x) {
return ((x - min(x))/(max(x) - min(x)))
}
x_train <- apply(ais[, 1:11], 2, min_max)
head(x_train)
rcc wcc hc hg ferr bmi ssf pcBfat
1 0.05479452 0.3818182 0.06722689 0.09210526 0.23008850 0.2156197 0.4693287 0.4723988
2 0.20890411 0.4545455 0.09663866 0.14473684 0.26548673 0.2218449 0.4328704 0.5242556
3 0.11643836 0.1545455 0.02100840 0.00000000 0.05752212 0.2891907 0.4432870 0.4767481
4 0.10616438 0.1818182 0.05882353 0.13157895 0.26991150 0.2903226 0.5694444 0.6032118
5 0.22260274 0.3181818 0.23529412 0.31578947 0.09292035 0.1250707 0.3026620 0.4018066
6 0.10273973 0.1000000 0.06302521 0.11842105 0.15044248 0.2427844 0.2731481 0.3328873
lbm ht wt
1 0.4042434 0.7768595 0.4812646
2 0.3376605 0.6743802 0.4285714
3 0.2931323 0.4776860 0.3665105
4 0.3185371 0.5966942 0.4344262
5 0.2629816 0.5900826 0.3138173
6 0.2709380 0.4148760 0.3032787
pca <- prcomp(x_train)
pca
Standard deviations (1, .., p=11):
[1] 0.385868910 0.289535274 0.197430062 0.175422597 0.148272925 0.111117183 0.053473287
[8] 0.033736286 0.028849470 0.012240374 0.005661196
Rotation (n x k) = (11 x 11):
PC1 PC2 PC3 PC4 PC5 PC6
rcc 0.33739235 -0.05838215 0.02150914 -0.358680602 0.21131404 -0.23211863
wcc 0.05689837 0.14962256 -0.19771247 -0.532764570 -0.79752803 0.12273024
hc 0.34607222 -0.06596026 0.02526416 -0.341668220 0.19414008 -0.15364551
hg 0.40981431 -0.05733335 -0.01043262 -0.356736075 0.26573951 -0.05898774
ferr 0.24216199 0.10387340 -0.89611758 0.300730010 0.02856695 -0.18827728
bmi 0.20640664 0.41353060 -0.01678115 -0.001072319 0.18943822 0.61774042
ssf -0.24981756 0.53068491 -0.03108384 -0.168754158 0.17155065 -0.20819204
pcBfat -0.34297756 0.51318779 -0.03458144 -0.212507756 0.18630827 -0.23411251
lbm 0.40746613 0.23293517 0.18540244 0.255736225 -0.11865756 0.15393803
ht 0.25609714 0.20005781 0.30599704 0.289957285 -0.30572044 -0.59795919
wt 0.28690117 0.38144384 0.16193012 0.173086378 -0.05098682 0.08669181
PC7 PC8 PC9 PC10 PC11
rcc 0.764342861 0.18766338 -0.188558338 0.0108028781 -0.0015251304
wcc -0.006124073 0.01459060 -0.008900196 -0.0017048843 0.0010849554
hc -0.135781504 -0.66536764 0.485478052 -0.0191951506 -0.0015475224
hg -0.614251195 0.41548964 -0.278004128 -0.0072450005 0.0048633771
ferr 0.007387012 -0.02722995 -0.006486023 0.0002535866 -0.0002313937
bmi 0.078145496 -0.08355718 -0.040218913 0.5875136360 0.0934749543
ssf 0.002470876 0.43873186 0.599635163 -0.0254040883 0.0323658233
pcBfat -0.065008567 -0.38168309 -0.529387128 -0.1636377027 0.1618446920
lbm 0.044556113 0.03621892 0.051215815 -0.5186314003 0.6060216964
ht -0.085340672 -0.01761235 -0.062260221 0.4996783929 0.0671823933
wt 0.018427026 -0.04136346 -0.058474104 -0.3289489617 -0.7695555910
qplot(x = 1:11, y = cumsum(pca$sdev)/sum(pca$sdev), geom="line")
As shown above, the first 6 components can explain 90% variance of the data.
Below shows how the first 2 principle components seperates male and female:
ggplot(data.frame(PC1=as.data.frame(pca$x)$PC1, PC2=as.data.frame(pca$x)$PC2),
aes(x = PC1, y = PC2, col = ais$sex)) + geom_point()
Below shows how the first 3 principle components seperates male and female:
# devtools::install_github("ropensci/plotly")
library(plotly)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: 㤼㸱plotly㤼㸲
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
last_plot
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
The following object is masked from 㤼㸱package:graphics㤼㸲:
layout
pca_plotly <- plot_ly(as.data.frame(pca$x), x = ~PC1, y = ~PC2, z = ~PC3, color = ~ais$sex) %>% add_markers()
pca_plotly
minimal value for n is 3, returning requested palette with 3 different levels
minimal value for n is 3, returning requested palette with 3 different levels
minimal value for n is 3, returning requested palette with 3 different levels
minimal value for n is 3, returning requested palette with 3 different levels
# devtools::install_github("rstudio/tensorflow") # make sure you have installed devtools
library("tensorflow")
library("keras")
# I have already installed keras with tensorflow backend through Conda, so no need to install again here
# reticulate::py_discover_config() # this will show available installed tensorflow in python
keras::is_keras_available() # If this reurns True, it means you Keras with tensorflow is available
[1] TRUE
head(x_train)
rcc wcc hc hg ferr bmi ssf pcBfat lbm ht
1 0.05479452 0.3818182 0.06722689 0.09210526 0.23008850 0.2156197 0.4693287 0.4723988 0.4042434 0.7768595
2 0.20890411 0.4545455 0.09663866 0.14473684 0.26548673 0.2218449 0.4328704 0.5242556 0.3376605 0.6743802
3 0.11643836 0.1545455 0.02100840 0.00000000 0.05752212 0.2891907 0.4432870 0.4767481 0.2931323 0.4776860
4 0.10616438 0.1818182 0.05882353 0.13157895 0.26991150 0.2903226 0.5694444 0.6032118 0.3185371 0.5966942
5 0.22260274 0.3181818 0.23529412 0.31578947 0.09292035 0.1250707 0.3026620 0.4018066 0.2629816 0.5900826
6 0.10273973 0.1000000 0.06302521 0.11842105 0.15044248 0.2427844 0.2731481 0.3328873 0.2709380 0.4148760
wt
1 0.4812646
2 0.4285714
3 0.3665105
4 0.4344262
5 0.3138173
6 0.3032787
x_train <- as.matrix(x_train)
head(x_train)
rcc wcc hc hg ferr bmi ssf pcBfat
1 0.05479452 0.3818182 0.06722689 0.09210526 0.23008850 0.2156197 0.4693287 0.4723988
2 0.20890411 0.4545455 0.09663866 0.14473684 0.26548673 0.2218449 0.4328704 0.5242556
3 0.11643836 0.1545455 0.02100840 0.00000000 0.05752212 0.2891907 0.4432870 0.4767481
4 0.10616438 0.1818182 0.05882353 0.13157895 0.26991150 0.2903226 0.5694444 0.6032118
5 0.22260274 0.3181818 0.23529412 0.31578947 0.09292035 0.1250707 0.3026620 0.4018066
6 0.10273973 0.1000000 0.06302521 0.11842105 0.15044248 0.2427844 0.2731481 0.3328873
lbm ht wt
1 0.4042434 0.7768595 0.4812646
2 0.3376605 0.6743802 0.4285714
3 0.2931323 0.4776860 0.3665105
4 0.3185371 0.5966942 0.4344262
5 0.2629816 0.5900826 0.3138173
6 0.2709380 0.4148760 0.3032787
model <- keras_model_sequential()
model %>%
layer_dense(units=6, activation="tanh",
input_shape = ncol(x_train)) %>%
layer_dense(units=2, activation="tanh", name="bottleneck") %>%
layer_dense(units=6, activation="tanh") %>%
layer_dense(units=ncol(x_train))
2019-11-28 21:26:32.725655: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations: AVX AVX2
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2019-11-28 21:26:32.735363: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.
summary(model)
Model: "sequential"
___________________________________________________________________________________________
Layer (type) Output Shape Param #
===========================================================================================
dense (Dense) (None, 6) 72
___________________________________________________________________________________________
bottleneck (Dense) (None, 2) 14
___________________________________________________________________________________________
dense_1 (Dense) (None, 6) 18
___________________________________________________________________________________________
dense_2 (Dense) (None, 11) 77
===========================================================================================
Total params: 181
Trainable params: 181
Non-trainable params: 0
___________________________________________________________________________________________
model %>% compile(
loss = "mean_squared_error",
optimizer = "adam"
)
model %>% fit(
x = x_train,
y = x_train, # here y is still x_train
epochs = 2000,
verbose = 0
)
mse.ae2 <- evaluate(model, x_train, x_train)
202/1 [======================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================== loss
====================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 66us/sample - loss: 0.0126
0.009373661
# extract the encoded layer (bottleneck layer)
intermediate_layer_model <- keras_model(inputs = model$input,
outputs = get_layer(model, "bottleneck")$output)
intermediate_output <- predict(intermediate_layer_model, x_train)
Below shows how the encoded layer seperates male and female: To be honest, I didn’t see much difference here, comparing with PCA method above….
ggplot(data.frame(PC1 = intermediate_output[,1], PC2 = intermediate_output[,2]), aes(x = PC1, y = PC2, col = ais$sex)) + geom_point()
model2 <- keras_model_sequential()
model2 %>%
layer_dense(units=6, activation="tanh",
input_shape = ncol(x_train)) %>%
layer_dense(units=3, activation="tanh", name="bottleneck") %>%
layer_dense(units=6, activation="tanh") %>%
layer_dense(units=ncol(x_train))
summary(model2)
___________________________________________________________________________________________
Layer (type) Output Shape Param #
===========================================================================================
dense_3 (Dense) (None, 6) 72
___________________________________________________________________________________________
bottleneck (Dense) (None, 3) 21
___________________________________________________________________________________________
dense_4 (Dense) (None, 6) 24
___________________________________________________________________________________________
dense_5 (Dense) (None, 11) 77
===========================================================================================
Total params: 194
Trainable params: 194
Non-trainable params: 0
___________________________________________________________________________________________
# compile model
model2 %>% compile(
loss = "mean_squared_error",
optimizer = "adam"
)
# fit model
model2 %>% fit(
x = x_train,
y = x_train,
epochs = 2000,
verbose = 0
)
# evaluate the model
evaluate(model2, x_train, x_train)
32/202 [===>..........................] - ETA: 1s
202/202 [==============================] - 0s 973us/step
loss
0.006340118
It seems that it does seperate better than PCA above
intermediate_layer_model <- keras_model(inputs = model2$input, outputs = get_layer(model2, "bottleneck")$output)
intermediate_output <- predict(intermediate_layer_model, x_train)
# plot the reduced dat set
aedf <- data.frame(node1 = intermediate_output[,1], node2 = intermediate_output[,2], node3 = intermediate_output[,3])
ae_plotly <- plot_ly(aedf, x = ~node1, y = ~node2, z = ~node3, color = ~ais$sex) %>% add_markers()
ae_plotly
minimal value for n is 3, returning requested palette with 3 different levels
minimal value for n is 3, returning requested palette with 3 different levels
minimal value for n is 3, returning requested palette with 3 different levels
minimal value for n is 3, returning requested palette with 3 different levels
# pCA reconstruction
pca.recon <- function(pca, x, k){
mu <- matrix(rep(pca$center, nrow(pca$x)), nrow = nrow(pca$x), byrow = T)
recon <- pca$x[,1:k] %*% t(pca$rotation[,1:k]) + mu
mse <- mean((recon - x)^2)
return(list(x = recon, mse = mse))
}
xhat <- rep(NA, 10)
for(k in 1:10){
xhat[k] <- pca.recon(pca, x_train, k)$mse
}
# Autoencoder reconstruction
# autoencoder last layer is reconstruction
ae.mse <- rep(NA, 5)
for(k in 1:5){
modelk <- keras_model_sequential()
modelk %>%
layer_dense(units = 6, activation = "tanh", input_shape = ncol(x_train)) %>%
layer_dense(units = k, activation = "tanh", name = "bottleneck") %>%
layer_dense(units = 6, activation = "tanh") %>%
layer_dense(units = ncol(x_train))
modelk %>% compile(
loss = "mean_squared_error",
optimizer = "adam"
)
modelk %>% fit(
x = x_train,
y = x_train,
epochs = 5000,
verbose = 0
)
ae.mse[k] <- unname(evaluate(modelk, x_train, x_train))
}
202/1 [============================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 885us/sample - loss: 0.0149
C:\Users\hanhan\ANACON~1\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
WARNING:tensorflow:Method (on_train_batch_end) is slow compared to the batch update (0.183592). Check your callbacks.
202/1 [============================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 971us/sample - loss: 0.0122
202/1 [============================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 991us/sample - loss: 0.0064
202/1 [============================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 902us/sample - loss: 0.0042
202/1 [============================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================] - 0s 825us/sample - loss: 0.0016
df <- data.frame(k = c(1:10, 1:5), mse = c(xhat, ae.mse), method = c(rep("pca", 10), rep("autoencoder", 5)))
ggplot(df, aes(x = k, y = mse, col = method)) + geom_line()